/* OPTIONS OBS=500 NOREPLACE; */  
 
%include "ASMimplibs.sas";



/** 05/05/2015: Modified to save the files with imputation type groups and Beta model flags to
            	the allcmf library.    
	    	Modified to read in all of the variables that we need to compute plant TFP.
***/

data cmf2007_nonar (KEEP=survu_id lbdnum firmid et  form_proc tab tab_f tae tae_f ee ee_f 
           cf cf_f cm cm_f ph ph_f tvs tvs_f ww ww_f pw_f te te_f sw sw_f tib tib_f tie tie_f NAICS_NEW_6 /*
*/           pw_f_missing ww_f_missing ph_f_missing npw_imp nte_imp nww_imp nsw_imp nph_imp 
            ncm_imp ntab_imp ntvs_imp any_impute any_imp_notTIB_TIE any_imp_incl_TAB year);
 set cmf.cmf2007;
year = 2007;
 NAICS_NEW_6 = substr(NAICS_NEW,1,6);
 if ar ne 1 and TABBED="Y";
if pw_F in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W", /*
          */"RB","RD","RE","RH","RJ","RL","RM","RP","RS","RT","RV","RW") 
then npw_imp = 1; else npw_imp = 0;
if te_F in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W", /*
          */"RB","RD","RE","RH","RJ","RL","RM","RP","RS","RT","RV","RW") 
then nte_imp = 1; else nte_imp = 0;
if ww_F in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W", /*
          */"RB","RD","RE","RH","RJ","RL","RM","RP","RS","RT","RV","RW") 
then nww_imp = 1; else nww_imp = 0;
if sw_F in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W", /*
          */"RB","RD","RE","RH","RJ","RL","RM","RP","RS","RT","RV","RW") 
then nsw_imp = 1; else nsw_imp = 0;
if ph_F in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W", /*
          */"RB","RD","RE","RH","RJ","RL","RM","RP","RS","RT","RV","RW") 
then nph_imp = 1; else nph_imp = 0;
if cm_F in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W", /*
          */"RB","RD","RE","RH","RJ","RL","RM","RP","RS","RT","RV","RW") 
then ncm_imp = 1; else ncm_imp = 0;
if tab_F in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W", /*
           */"RB","RD","RE","RH","RJ","RL","RM","RP","RS","RT","RV","RW") 
then ntab_imp = 1; else ntab_imp = 0;
if tvs_F in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W", /*
           */"RB","RD","RE","RH","RJ","RL","RM","RP","RS","RT","RV","RW") 
then ntvs_imp = 1; else ntvs_imp = 0;

if ntvs_imp=1 or ncm_imp=1 or nph_imp = 1 or nsw_imp =1 or nww_imp =1 /*
*/ or EE_F in (" V","RV","RH","RJ","RT"," D"," H"," J"," T"," L","RL") /*
*/ or CF_F in (" V","RV","RH","RL","RT"," L"," H"," T") /*
*/ or TIB_F in (" B","RB"," A"," E"," H"," L"," M"," T","RA","RE","RH","RL","RM","RT") /*
*/ or TIE_F in (" B","RB"," A"," E"," H"," L"," M"," V","RA","RE","RH","RL","RM","RT","RV")
then any_impute=1;
else any_impute=0;

if any_impute=1 or tab_f in (" B"," D"," E"," H"," J"," L"," M"," P"," S"," T"," V"," W")
then any_imp_incl_TAB=1;
else any_imp_incl_TAB=0;


if ntvs_imp=1 or ncm_imp=1 or nph_imp = 1 or nsw_imp =1 or nww_imp =1 /*
*/ or EE_F in (" V","RV","RH","RJ","RT"," D"," H"," J"," T"," L","RL") /*
*/ or CF_F in (" V","RV","RH","RL","RT"," L"," H"," T") 
then any_imp_notTIB_TIE=1;
else any_imp_notTIB_TIE=0;


if pw_f = "" then pw_f_missing=1;
else  pw_f_missing=0;
if ww_f = "" then ww_f_missing=1;
else  ww_f_missing=0;
if ph_f = "" then ph_f_missing=1;
else  ph_f_missing=0;
run;

proc sort data=cmf2007_nonar nodupkey; 
by survu_id; 
run;

proc sort data=cmf2007_nonar nodupkey out=cmf2007_nonar_lbdnum;
 by lbdnum;
run;



/***
proc freq data=cmf2007_nonar;
 tables form_proc;
 title "form processed, 2007 CM, non-AR cases only";
run;
***/

proc freq data=cmf2007_nonar;
 tables PW_F TE_F WW_F SW_F PH_F CM_F TVS_F TAB_F tib_f tie_f ;
 title "Edit/Impute flags, 2007 CMF file, non-AR cases only";
run;

/****
proc freq data=cmf2007_nonar;
 tables ET*pw_f_missing ET*WW_F_missing ET*PH_F_missing ;
 title1 "Interaction of ASM indicator with Edit/Impute flags missing indicator";
 title2 "2007 CMF file, non-AR cases only";
run;

proc freq data=cmf2007_nonar;
 tables form_proc*pw_f_missing ;
 title1 "Interaction of form type with PW Edit/Impute flags missing indicator";
 title2 "2007 CMF file, non-AR cases only";
run;

*****/

data asm06 (keep = lbdnum ee cf cm ph tvs ww pw te sw tib tie );
 set asm.asm2006;
 if tabbed="Y";
run;

proc datasets library=work;
modify asm06;
rename ee = ee06  cf=cf06 cm=cm06 ph=ph06 tvs=tvs06 ww=ww06 pw=pw06 te=te06 sw=sw06 tib=tib06 tie=tie06;
run;

proc sort data=asm06 nodupkey; by lbdnum; run;

data cmf07asm06 inasm06notincmf07;
 merge cmf2007_nonar_lbdnum (in=incmf07) asm06 (in=inasm06); 
by lbdnum;
if incmf07 then output cmf07asm06;
else output inasm06notincmf07;
run;


data allcmf.cmf07asm06;
 set cmf07asm06;
  PW_F2nd=substr(PW_F,2,1); 
  TE_F2nd=substr(TE_F,2,1); 
  WW_F2nd=substr(WW_F,2,1); 
  SW_F2nd=substr(SW_F,2,1); 
  PH_F2nd=substr(PH_F,2,1); 
  CM_F2nd=substr(CM_F,2,1); 
  EE_F2nd=substr(EE_F,2,1); 
  CF_F2nd=substr(CF_F,2,1); 
  TVS_F2nd=substr(TVS_F,2,1); 
  TAB_F2nd=substr(TAB_F,2,1);


  IF SW_F="R "
  then SW_Fgroup='reported_edit_passing';
  else if SW_F in (" AJ","RA","RAJ","RAA"," A"," AA"," CA","RAQ","RCA")
  then SW_Fgroup='ad rec impute';
  else if SW_F in (" 1"," 2"," 9"," AQ"," C"," CC"," CQ",""," G","R2","R4","R9","RC","RG","RN","RC","RCC","RCJ")
  then SW_Fgroup='other non-impute';
  else if SW_F in (" CJ"," H"," HQ"," J","RH","RJ","RJJ","RP","RJJ","RHQ","RHQ")
  then SW_Fgroup='other impute';
  ELSE IF SW_F in ("RL"," L","RLQ")
  then SW_Fgroup='logical impute';

  if TE_F="R "
  then TE_Fgroup='reported edit-passing';
  ELSE IF TE_F IN ("RL"," L")
  then TE_Fgroup='logical impute';
  else IF TE_F IN ("RB"," B")
  then do; 
      TE_Fgroup='Beta regression';
      if sw06 ne . and te06 ne . then TE_Beta_model7=1;
      else TE_Beta_model7=0;
  end;
  else if TE_F in (" A"," CA","RA","RAQ" )
  then TE_Fgroup = 'from ad rec data';
  else if TE_F in (""," 1"," 9"," B1"," C"," G"," JF","R2","RBF","RC","RC1","RG")
  then TE_Fgroup='other non-imputed';
  else if TE_F in (" H"," J"," M"," P"," V","RE","REF","RH","RJ","RJF","RLQ","RP")
  then TE_Fgroup='other imputed';

  if TVS_F="R "
  then TVS_Fgroup='reported_edit_passing';
  else IF TVS_F in (" B","RB")
  then do;
      TVS_Fgroup='Beta_regression';
      if sw06 ne . and tvs06 ne . then TVS_Beta_model7=1;
      else TVS_Beta_model7=0;
  end;
  else if TVS_F in (" A"," CA","RA" )
  then TVS_Fgroup = 'from ad rec data';
  else if TVS_F in (" V","RV")
  then TVS_Fgroup = 'industry average';
  else if TVS_F in (" E"," H"," J"," L", " M"," P"," S"," HK","RL","RE","RH","RJ","RM","RP","RS")
  then TVS_Fgroup='other impute ';
  else if TVS_F in (" ",""," C"," G"," CF"," MC"," U"," AC"," BC"," C"," N"," X","RC","RCI","RG","RGF","RN","RX")
  then TVS_Fgroup='other non-impute ';

  if TIB_F="R "
  then TIB_Fgroup='reported_edit_passing';
  else IF TIB_F in (" B","RB")
  then do; 
      TIB_Fgroup='Beta_regression';
      if tvs06 ne . and tib06 ne . then TIB_Beta_model7=1;
      else TIB_Beta_model7=0;
  end;
  else if TIB_F in (" S","RS")
  then TIB_Fgroup='Direct substitution (S or RS)';
  else if TIB_F in (" A"," E"," H"," L"," M"," T","RA","RE","RH","RL","RM","RT")
  then TIB_Fgroup = 'other impute';
  else if TIB_F in (" C"," G"," Z","RC","RG","RN","RX","RZ")
  then TIB_Fgroup = 'other non-impute';
  else if missing(TIB_F)
  then TIB_Fgroup = 'flag missing';

  if TIE_F="R "
  then TIE_Fgroup='reported_edit_passing';
  else IF TIE_F in (" B","RB")
  then do;
      TIE_Fgroup='Beta_regression';
      if tvs06 ne . and tie06 ne . then TIE_Beta_model7=1;
      else TIE_Beta_model7=0;
  end;
  else if TIE_F in (" A"," E"," H"," L"," M"," V","RA","RE","RH","RL","RM","RT","RV")
  then TIE_Fgroup = 'other impute';
  else if TIE_F in (" C"," G"," Z","RC","RG","RN","RX","RZ")
  then TIE_Fgroup = 'other non-impute';
  else if missing(TIE_F)
  then TIE_Fgroup = 'flag missing';


  if CM_F="R "
  then CM_Fgroup='reported_edit_passing';
  else IF CM_F in (" B","RB")
  then do;
      CM_Fgroup='Beta regression';
      if tvs06 ne . and cm06 ne . then CM_Beta_model7=1;
      else CM_Beta_model7=0;
  end;
  ELSE IF CM_F in (" L","RL")
  then CM_Fgroup='logical impute';
  else if CM_F in (" E"," H"," M","RE","RH","RL","RM") 
  then CM_Fgroup = "other impute ";
  else if CM_F in( ""," ") 
  then CM_Fgroup='flag missing';
  else if CM_F in (" C","RC","RN"," U")
  then CM_Fgroup="other non-impute";

  IF PW_F2nd ='V'
  then PW_Fgroup='industry_average';
  else if PW_F="R "
  then PW_Fgroup='reported_edit_passing';
  else if PW_F="" 
  then PW_Fgroup='flag missing';
  else if PW_F2nd in ('G','K','N','O','U','X','Z')
  then PW_Fgroup='nonimp_other';
  ELSE IF PW_F2nd ='A'
  then PW_Fgroup='ad_rec';
  ELSE IF PW_F2nd ='C'
  then PW_Fgroup='analyst_corrected';
  ELSE IF PW_F2nd ='S'
  then PW_Fgroup='direct_substitution';
  else if PW_F2nd in ('L','H','D','E','J','M','P','T','W')
  then PW_Fgroup='other_impute';
  else IF PW_F2nd ='B'
  then do;
      PW_Fgroup='Beta_regression';
      if (te06 ne . and pw06 ne .) then PW_Beta_model7=1;
      else PW_Beta_model7=0;
  end;
  else if missing(PW_F)
  then PW_Fgroup = 'flag missing';

  if EE_F="R "
  then EE_Fgroup='reported_edit_passing';
  else IF EE_F in (" V","RV")
  then EE_Fgroup='industry_average';
  else if EE_F in ("RC","RK","RN"," C"," N")
  then EE_Fgroup='other non-impute';
  else if EE_F="" 
  then EE_Fgroup='flag missing';
  else if EE_F in ("RH","RJ","RT"," D"," H"," J"," T"," L","RL")
  then EE_Fgroup='other impute';

  if CF_F="R "
  then CF_Fgroup='reported_edit_passing';
  else IF CF_F in (" V","RV")
  then CF_Fgroup='industry_average';
  else if CF_F in ("RC","RK","RN"," C"," N")
  then CF_Fgroup='other non-impute';
  else if CF_F="" 
  then CF_Fgroup='flag missing';
  else if CF_F in ("RH","RL","RT"," L"," H"," T")
  then CF_Fgroup='other impute';

  IF WW_F2nd ='V'
  then WW_Fgroup='industry_average';
  else if WW_F="R "
  then WW_Fgroup='reported_edit_passing';
  else if WW_F="" 
  then WW_Fgroup='flag missing';
  else if WW_F2nd in ('G','K','N','O','U','X','Z')
  then WW_Fgroup='nonimp_other';
  ELSE IF WW_F2nd ='A'
  then WW_Fgroup='ad_rec';
  ELSE IF WW_F2nd ='C'
  then WW_Fgroup='analyst_corrected';
  ELSE IF WW_F2nd ='S'
  then WW_Fgroup='direct_substitution';
  else if WW_F2nd in ('L','H','D','E','J','M','P','T','W')
  then WW_Fgroup='other_impute';
  else IF WW_F2nd ='B'
  then do;
      WW_Fgroup='Beta_regression';
      if (sw06 ne . and ww06 ne .) or (pw06 ne . and ww06 ne .) then WW_Beta_model7=1;
      else WW_Beta_model7=0;
  end;
  ELSE IF missing(WW_F)
  then WW_Fgroup = 'flag missing';

  if PH_F = "R "
  then PH_Fgroup='reported edit-passing';
  else IF PH_F in (" B"," BF","RB","RBF")
  then do;
      PH_Fgroup='Beta_regression ';
      if (pw06 ne . and ph06 ne .) or (ww06 ne . and ph06 ne .) then PH_Beta_model7=1;
      else PH_Beta_model7=0;
  end;
  else if PH_F in (" E"," H"," HF"," J"," JF"," L"," M","RE","REF","RH","RHF","RJF","RL","RM","RMF")
  then PH_Fgroup='other impute';
  else if PH_F in (""," "," C"," CF","RC","RCF","RN")
  then PH_Fgroup='other non-impute';


  if TAB_F in ("RB"," B")
  then do;
      TAB_Fgroup='Beta regression';
  end;
  else if TAB_F="R "
  then TAB_Fgroup='reported, passes edits';
  elsef if TAB_F in (" V","RV")
  thend TAB_Fgroup ='industry_average';
  else if TAB_F in (" D"," J","RJ")
  then TAB_Fgroup='other impute';
  else if TAB_F in (""," C","RC") 
  then TAB_Fgroup='other non-impute';
  else if missing(TAB_F)
  then TAB_Fgroup = 'flag missing';

  if TAE_F in ("RB"," B")
  then do;
      TAE_Fgroup='Beta regression';
  end;
  else if TAE_F="R "
  then TAE_Fgroup='reported, passes edits';
  elsef if TAE_F in (" V","RV")
  thend TAE_Fgroup ='industry_average';
  else if TAE_F in (" D"," J","RJ")
  then TAE_Fgroup='other impute';
  else if TAE_F in (""," C","RC") 
  then TAE_Fgroup='other non-impute';
  else if missing(TAE_F)
  then TAE_Fgroup = 'flag missing';
run;

proc freq data=allcmf.cmf07asm06;
 tables SW_Fgroup TE_Fgroup TVS_Fgroup TIB_Fgroup TIE_Fgroup CM_Fgroup PW_Fgroup WW_Fgroup PH_Fgroup EE_Fgroup CF_Fgroup TAB_Fgroup TAE_Fgroup /* 
*/ any_impute any_imp_notTIB_TIE any_imp_incl_TAB;
 title1 "Groups of Edit/Impute flags, ";
 title2 "2007 CM, non-AR cases only";
run;


proc freq data=allcmf.cmf07asm06;
 tables TE_Beta_model7 TVS_Beta_model7 TIB_Beta_model7 TIE_Beta_model7 CM_Beta_model7 PW_Beta_model7 WW_Beta_model7 PH_Beta_model7 ;
 title1 "Model 1 (=0) vs. Model 7 Beta imputes, ";
 title2 "2007 CM, non-AR cases only";
run;


/** Disclosure analysis: only need to look at firm counts. */


%macro do_DA(var=);

 * adding up to the firm level;
 proc summary data=allcmf.cmf07asm06 nway;
   class  &var id;
   var tvs;
 output out=chk sum=;

 *adding up to the entire sample level;
 proc summary data=chk nway;
  class  &var;
  var tvs;
 output out=chkall sum=;

 data chkall;
  set chkall;
  firmcount=_freq_;

 title "The number of firms in the sample";run;
 proc print data=chkall;var &var firmcount tvs;


%mend;

/****%do_DA(var=nte_imp);
*%do_DA(var=nsw_imp);
*%do_DA(var=ntvs_imp);
*%do_DA(var=ncm_imp);
*%do_DA(var=npw_imp);
*%do_DA(var=nww_imp);
*%do_DA(var=nph_imp);
*%do_DA(var=ncf_imp);
*%do_DA(var=nee_imp);
*%do_DA(var=ntab_imp);
***/


%macro do_DA_on_groups(var=);

 * adding up to the firm level;
 proc summary data=allcmf.cmf07asm06 nway;
   class &var firmid;
   var tvs;
 output out=chk sum=;

 *adding up to the entire sample level;
 proc summary data=chk nway;
  class  &var;
  var tvs;
 output out=chkall sum=;

 data chkall;
  set chkall;
  firmcount=_freq_;

 title "The number of firms in the sample";run;
 proc print data=chkall;var &var firmcount tvs;


%mend;

%do_DA_on_groups(var=SW_Fgroup);
%do_DA_on_groups(var=TE_Fgroup);
%do_DA_on_groups(var=TVS_Fgroup);
%do_DA_on_groups(var=TIB_Fgroup);
%do_DA_on_groups(var=TIE_Fgroup);
%do_DA_on_groups(var=CM_Fgroup);
%do_DA_on_groups(var=PW_Fgroup);
%do_DA_on_groups(var=WW_Fgroup);
%do_DA_on_groups(var=PH_Fgroup);
%do_DA_on_groups(var=EE_Fgroup);
%do_DA_on_groups(var=CF_Fgroup);
%do_DA_on_groups(var=TAB_Fgroup);
%do_DA_on_groups(var=TAE_Fgroup);


%do_DA_on_groups(var=TE_Beta_model7);
%do_DA_on_groups(var=TVS_Beta_model7);
%do_DA_on_groups(var=TIB_Beta_model7);
%do_DA_on_groups(var=TIE_Beta_model7);
%do_DA_on_groups(var=CM_Beta_model7);
%do_DA_on_groups(var=PW_Beta_model7);
%do_DA_on_groups(var=WW_Beta_model7);
%do_DA_on_groups(var=PH_Beta_model7);

%do_DA_on_groups(var=any_impute);
